import xlrd
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import datasets, linear_model
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm
from sklearn.metrics import mean_squared_error, r2_score

#计算两列表的 相关系数
def Corr_Gust(list1,list2):
    g_s_m = pd.Series(list1)  # 利用Series将列表转换成新的、pandas可处理的数据
    g_a_d = pd.Series(list2)
    corr_gust = round(g_s_m.corr(g_a_d), 4)  # 计算标准差，round(a, 4)是保留a的前四位小数
    return corr_gust

#计算R平方
def R_Square(list1,list2):
    r = Corr_Gust(list1,list2)
    return r*r

#调整R平方
def Trim_R2(list1,list2,n,p):
    return 1-(1-R_Square(list1,list2))*(n-1)/(n-p-1)

#标准误差计算
def S_error(list_):
    return np.std(list_, ddof=1)

#方差检验
def V_analysis(list1,list2):
    list_ = []
    for i in list1:
        list_.append([1,i])
    for i in list2:
        list_.append([2,i])
    df = pd.DataFrame(np.array(list_),index=None)
    df.columns = ['水平', '观测值']
    formula = '{} ~ {}'.format(df.columns[1], df.columns[0])
    model = ols(formula, df).fit()
    anovat = anova_lm(model)
    print(anovat)

workbook = xlrd.open_workbook('线性回归销量预测.xlsx')
table = workbook.sheet_by_name("Sheet2")
#table = workbook.sheet()[1]
#print(table.nrows,table.ncols)
#for i in range(table.ncols):
    #print(table.col_values(i))

week = 2
kuai_cai = [140,830,3495]
to_predicted = kuai_cai[0]

# 将特征数据集分为训练集和测试集
x_old = np.array(table.col_values(week)[1:])
y_old = np.array(table.col_values(3)[1:])
X_train = x_old[:]
X_train = X_train.reshape(-1, 1)
X_test = x_old[:]
X_test = X_test.reshape(-1, 1)

# 把目标数据（特征对应的真实值）也分为训练集和测试集
y_train = y_old[:]
y_test = y_old[:]

# 创建线性回归模型
regr = linear_model.LinearRegression()

# 用训练集训练模型——看就这么简单，一行搞定训练过程

regr.fit(X_train, y_train)

# 用训练得出的模型进行预测
diabetes_y_pred = regr.predict(X_test)
#print(str(to_predicted)+'Lowest is' + str(regr.predict(np.array([to_predicted]).reshape(-1, 1))))
print(regr.intercept_,regr.coef_)


# 将测试结果以图标的方式显示出来
plt.rcParams['font.sans-serif']=['SimHei'] #用来正常显示中文标签
plt.rcParams['axes.unicode_minus']=False #用来正常显示负号
plt.scatter(kuai_cai[week-1],kuai_cai[2],color='red',label='快菜（'+str(week)+'周销量，6周销量)',marker='p',s=200)
plt.scatter(X_test, y_test, color='green',label=str(week)+'周预测销量')
plt.plot(X_test, diabetes_y_pred, color='black', linewidth=3 ,label='6周预测销量')

count = 0 #仅用来计数
for i in range(len(y_train)):
    if abs(y_train[i]-diabetes_y_pred[i])/diabetes_y_pred[i] >= 0.5: #如果误差大于30%，绘制误差线
        count = count + 1
        if count == 1:
        #print([X_train[i][0],y_train[i]], [X_train[i][0],diabetes_y_pred[i]])
            plt.plot([X_train[i][0],X_train[i][0]], [y_train[i],diabetes_y_pred[i]], color='orange',label='误差线')
            plt.scatter([X_train[i][0],X_train[i][0]], [y_train[i],diabetes_y_pred[i]], color='blue',label='误差大于50%的点') #本质是绘制两点间的直线
        else:
            plt.plot([X_train[i][0],X_train[i][0]], [y_train[i],diabetes_y_pred[i]], color='orange')
            plt.scatter([X_train[i][0],X_train[i][0]], [y_train[i],diabetes_y_pred[i]], color='blue') #本质是绘制两点间的直线

plt.title(str(week)+"周销量预测6周销量")
plt.xlabel(str(week)+"周销量")
plt.ylabel("6周销量")
plt.legend() #显示图例#plt.xticks(())  #自定义坐标轴#plt.yticks(())
plt.show()

'''
print(Corr_Gust(x_old,y_old))
print(R_Square(x_old,y_old))
print(Trim_R2(x_old,y_old,len(x_old),1))
print(S_error(x_old))
'''
#V_analysis(x_old,y_old)